
global Truck "/Users/yuanningliang/Truck/"

*-------------------------------------------------------------------------
/* figure 1 and figure A24 */
use "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta",clear

local insp_F
forvalues t = 12(-2)2{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

* save memory:
keep crash `insp_F' insp_p insp_m  event_time crash_event_18hr flag_oos new_VIN year month dayofweek post_insp


reghdfe crash `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1 & flag_oos ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

reghdfe crash post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1 & flag_oos ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)

* including oos
reghdfe crash `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1 , absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

reghdfe crash post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)


* plot figure 1
	// all event study coefficients are copied from the regression results to a separate figure.dta 
	// much more efficient than using estout etc. because regressions take too long 

clear all

set obs 14

gen event_day = (_n - 8)*2

rename var2 crash_18hr
rename var3 lb_18hr
rename var4 ub_18hr

local dep_mean = 0.0000637

gen pct_18hr = crash_18hr / `dep_mean' * 100
gen pct_18hr_lb = lb_18hr / `dep_mean' * 100
gen pct_18hr_ub = ub_18hr / `dep_mean' * 100

summ pct_18hr if inrange(event_day,-14,-2)
gen avg_18hr_F = r(mean)

summ pct_18hr if inrange(event_day,0,12)
gen avg_18hr_L = r(mean)

* 
summ avg_18hr_F
local avg_18hr_F = r(mean)
summ avg_18hr_L
local avg_18hr_L = r(mean)

local color "forest_green"

twoway (rarea pct_18hr_ub pct_18hr_lb event_day if event_day <= -4 ,fcolor(`color'*0.2) lcolor(`color'*0.01)) ///
	(rarea pct_18hr_ub pct_18hr_lb event_day if inrange(event_day,0,12), fcolor(`color'*0.2) lcolor(`color'*0.01)) ///
	(connected pct_18hr event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color') lcolor(`color') lpattern(solid) lwidth(medium)) ///
	(line avg_18hr_F event_day if inrange(event_day,-14,0), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(line avg_18hr_L event_day if inrange(event_day,0,12), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(scatteri `avg_18hr_F' 0 `avg_18hr_L' 0 , recast(line) lp(solid) lcolor(gs4) lwidth(0.6)), ///
	legend(off) xlabel(-14(2)12) ylabel(-10(10)60, nogrid) ///
	text(20 4 "change=44.6%", place(c) color(`color') ) ///
	xtitle("day") ytitle("% change (day-2=0)") ///
	graphregion(color(white)) xsize(4.5)

graph export "plots/division_100pct/new/crash_18hr_100pct.pdf", replace	



* plot figure A24
use "$Truck/r&r/new results/revision_plots.dta",clear 

rename (var52 var53 var54) (noos lb_noos ub_noos)

local mean = 0.0000639
foreach var of varlist noos lb_noos ub_noos {
	replace `var' = `var' / `mean' * 100
}

summ noos if inrange(event_day,-14,-2)
gen noos_F = r(mean)

summ noos if inrange(event_day,0,12)
gen noos_L = r(mean)

*
summ noos_F
local noos_F = r(mean)
summ noos_L
local noos_L = r(mean)

local color_treat "black"

local var noos
local xaxis event_day
twoway (rarea lb_`var' ub_`var' event_day if event_day <= -4, color(`color_treat'%20) lcolor(bg)) ///
	(rarea lb_`var' ub_`var' event_day if inrange(event_day,0,12), color(`color_treat'%20) lcolor(bg)) ///
	(connected `var' event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color_treat') lcolor(`color_treat') lpattern(solid) lwidth(medium)) ///
	(line noos_F event_day if inrange(event_day,-14,0), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(line noos_L event_day if inrange(event_day,0,12), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(scatteri `noos_L' 0 `noos_F' 0 , recast(line) lp(solid) lcolor(gs10%50) lwidth(0.6)), ///
	legend(off) xlabel(-14(2)12, nogrid) ylabel(-10(10)60, nogrid) ///
	text(20 5 "change=43.5%", place(c) color(`color') ) ///
	xtitle("day") ytitle("% change (day-2=0)") graphregion(color(white)) xsize(4.5)
	
graph export "$Truck/r&r/new results/include_oos.pdf", replace	





*-------------------------------------------------------------------------
/* figure 2 */
use "$Truck/processed_data/EventData/division_100pct/Insp_Crash_month2_100pct.dta", clear

local insp_F
forvalues t = 12(-1)2{
local insp_F `insp_F' F`t'insp
}
local insp_F `insp_F' inspection
forvalues t = 1(1)24 {
local insp_F `insp_F' L`t'insp
}

reghdfe crash `insp_F', absorb(new_VIN year month) vce(cluster new_VIN)

reghdfe crash post_insp, absorb(new_VIN year month) vce(cluster new_VIN)


summ crash if e(sample)

* combine with re-insp rate
use "$Truck/CISER server/outputs/Log/plots/figures_Sep2019/figures",clear

local color "navy"
local color2 "gs10"

twoway (rarea pct_month2_lb pct_month2_ub event_month if event_month <= -2 ,fcolor(`color'%20) lcolor(`color'%1) ) ///
	(rarea pct_month2_lb pct_month2_ub event_month if inrange(event_month,0,24), fcolor(`color'%20) lcolor(`color'%1)) ///
	(scatter pct_month2 event_month if inrange(event_month,-12,-1), msize(small) mcolor(`color') lcolor(`color') msymbol(Dh)) ///
	(scatter pct_month2 event_month if inrange(event_month,0,24), msize(small) mcolor(`color')) ///
	(line pct_month2 event_month if inrange(event_month,-12,24), lcolor(`color') lpattern(solid) lwidth(medium)) ///
	(bar reinsp2 event_month if inrange(event_month,-12,24), yaxis(2) fintensity(inten30) fcolor(`color2'%40) lcolor(`color2'%60) lpattern(solid) lwidth(thin)) ///
	(bar reinsp2 event_month  if event_month == 1, yaxis(2) fintensity(inten80) fcolor(`color2'%40) lcolor(`color2'%60) lpattern(solid) lwidth(thin)) ///
	(bar reinsp2 event_month  if event_month == 12, yaxis(2) fintensity(inten80) fcolor(`color2'%40) lcolor(`color2'%60) lpattern(solid) lwidth(thin)) ///
	(bar reinsp2 event_month  if event_month == 24, yaxis(2) fintensity(inten80) fcolor(`color2'%40) lcolor(`color2'%60) lpattern(solid) lwidth(thin)) ///
	, legend(off) xlabel(-12(2)24) ylabel(-20(5)20, nogrid) ylabel(0(20)80 240, nogrid axis(2)) ///
	text(-15.5 1 "20%",place(c) )  text(-6.9 12 "74%",place(c))  text(-4.8 23.6 "87%",place(c))  ///
	xtitle("Month") ytitle("% change in crash (month-1=0)") ytitle("Re-inspection probability (%)", axis(2)) ///
	graphregion(color(white)) xsize(6)	
	
graph export "$Truck/outputs/Log/plots/division_100pct/month_new.pdf", replace	



*-------------------------------------------------------------------------
/* figure 3 */

use "$Truck/processed_data/InspectionData/Insp_by_year/temp/Insp_100pct_VIN.dta", clear

* drop special event inspections
rename (insp_day insp_month insp_year insp_dow) (day month year dayofweek)

* 1. roadcheck 72 hours
gen roadcheck_day = 1 if month == 6 & day <= 8 & day > 1 & dayofweek == 2

gen date_roadcheck_day = date if roadcheck_day == 1
bysort year: egen date_roadcheck = max(date_roadcheck_day)

replace roadcheck_day = 1 if date == date_roadcheck + 1 
replace roadcheck_day = 1 if date == date_roadcheck + 2

* 2. brake week
gen brakeweek_day = 1 if month == 9 & day > 5 & day <= 12 & dayofweek == 0

gen date_brakeweek_day = date if brakeweek_day == 1
bysort year: gegen date_brakeweek = max(date_brakeweek_day)
replace date_brakeweek = mdy(9,7,2017) if year == 2017

replace brakeweek_day = 1 if inrange(date, date_brakeweek, date_brakeweek+6)


* 3. brake day
gen brakecheck_day = 1 if month == 5 & day <= 7 & dayofweek == 3

* drop special event inspections
drop if roadcheck_day == 1 | brakeweek_day == 1 | brakecheck_day == 1

* only fixed station
keep if insp_facility == "F"

* count the number of trucks inspected at F
bys new_VIN: gen temp = 1
count if temp == 1

* look at the date to next inspection
sort new_VIN insp_date insp_start_time

*isid inspection_id insp_date

gegen seq_insp = group(insp_date insp_start_time inspection_id)  

bysort new_VIN: egen rank_insp = rank(seq_insp)

tsset new_VIN rank_insp

gen D_insp_date = F.date - date


* 1 year by week
gen group = .
foreach i of numlist 1/52 { 
	replace group = `i' if D_insp_date <= `i'*7 & group == .
}
replace group = 53 if group == . & D_insp_date ~= .  // Re-inspect time > 1 year
replace group = 54 if D_insp_date == .  // NO Re-inspection

drop if D_insp_date == 0 // drop inspection twice a day
drop if D_insp_date == .

gisid inspection_id date 
gisid new_VIN date

compress 
save "$Truck/processed_data/InspectionData/Insp_by_year/new/reinsp_truck.dta", replace

gcollapse (sum) inspection, by(new_VIN group)

bys group: egen group_insp = sum(inspection)

bys group: keep if _n == 1

egen total = sum(group_insp)

gen pct = group_insp/total

sort group
gen cum_pct = sum(pct)

* cumulative cdf plot

twoway (connected cum_pct group if group <= 52, lcolor(black) mcolor(black) msymbol(Oh) msize(small)) ///
	(scatter cum_pct group if group == 1, mcolor(red) msymbol(D) msize(small)) ///
	(scatter cum_pct group if group == 13, mcolor(red) msymbol(D) msize(small)) ///
	(scatter cum_pct group if group == 52, mcolor(red) msymbol(D) msize(small)), ///
	xlabel(1(4)49 52) ylabel(0(.1).8, nogrid) legend(off) ///
	xtitle("Number of weeks since last inspection") ytitle("Probability") ///
	text(.05 1.6 "7.1%",place(c) )  text(0.35 14 "39.3%",place(c))  text(0.68 52 "73.9%",place(c))  ///
	graphregion(color(white)) 

graph export "$Truck\outputs\Log\Plots\division_100pct\reinsp_week_1yr_new.pdf", replace



*-------------------------------------------------------------------------
/* figure 4 and Table 2*/
use "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta",clear

local insp_F
forvalues t = 12(-2)2{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}
* save memory:
keep crash `insp_F' insp_p insp_m  event_time crash_event_18hr flag_oos new_VIN year month dayofweek post_insp event_id* vehicles_in_accident


foreach i of numlist 1/4 {
 
  gen crash_single`i' = 1 if event_id`i' ~= 13 & event_id`i' ~= 98 & event_id`i' ~= . 	
  
  gen crash_multi`i' = 1 if event_id`i' == 13

 }
gen crash_single = (crash_single1 == 1)
replace crash_single = 1 if crash_single2 == 1 
replace crash_single = 1 if crash_single3 == 1
replace crash_single = 1 if crash_single4 == 1
replace crash_single = 1 if vehicles_in_accident == 1

gen crash_multi = (crash_multi1 == 1)
replace crash_multi = 1 if crash_multi2 == 1 
replace crash_multi = 1 if crash_multi3 == 1
replace crash_multi = 1 if crash_multi4 == 1
replace crash_multi = 1 if crash_single ~= 1 & vehicles_in_accident > 1 & vehicles_in_accident ~=.
replace crash_multi = 0 if crash_single == 1 

drop crash_single1 crash_single2 crash_single3 crash_single4 crash_multi1 crash_multi2 crash_multi3 crash_multi4 


compress

local insp_F
forvalues t = 12(-2)2{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}



reghdfe crash_multi `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1 & flag_oos ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

reghdfe crash_multi post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1 & flag_oos ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash_multi if e(sample)



reghdfe crash_single `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1 & flag_oos ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

reghdfe crash_single post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1 & flag_oos ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash_single if e(sample)


* plot
use "$Truck/outputs/Log/plots/division_100pct/new/figures.dta",clear

rename var10 multi
rename var11 multi_lb
rename var12 multi_ub

rename var13 single
rename var14 single_lb
rename var15 single_ub

local multi_dep_mean = 0.0000409
local single_dep_mean = 2.19E-05

foreach var of varlist multi multi_ub multi_lb {
	gen pct_`var' = `var' / `multi_dep_mean' * 100
}
foreach var of varlist single single_ub single_lb {
	gen pct_`var' = `var' / `single_dep_mean' * 100
}
 
summ pct_multi if event_day < 0
gen avg_multi_F = r(mean)

summ pct_multi if event_day >= 0
gen avg_multi_L = r(mean)

summ pct_single if event_day < 0
gen avg_single_F = r(mean)

summ pct_single if event_day >= 0
gen avg_single_L = r(mean)


* 
qui foreach var of varlist avg_multi_F avg_multi_L avg_single_F avg_single_L {
summ `var'
local `var' = r(mean)
}

local color_multi "maroon"
local color_single "black"

twoway (rarea pct_multi_ub pct_multi_lb event_day if event_day <= -4, color(`color_multi'%20) lcolor(bg)) ///
	(rarea pct_multi_ub pct_multi_lb event_day if inrange(event_day,0,12), color(`color_multi'%20) lcolor(bg)) ///
	(rarea pct_single_ub pct_single_lb event_day if event_day <= -4, color(`color_single'%20) lcolor(bg)) ///
	(rarea pct_single_ub pct_single_lb event_day if inrange(event_day,0,12), color(`color_single'%20) lcolor(bg)) ///
	(connected pct_multi event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color_multi') lcolor(`color_multi') lpattern(solid) lwidth(medium)) ///
	(connected pct_single event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color_single') lcolor(`color_single') lpattern(dash) lwidth(medium)) ///
	(line avg_multi_F event_day if event_day <= 0, lcolor(`color_multi'%70) lwidth(thin) lpattern(dash)) ///
	(line avg_multi_L event_day if inrange(event_day,0,12), lcolor(`color_multi'%70) lwidth(thin) lpattern(dash)) ///
	(line avg_single_F event_day if event_day <= 0, lcolor(`color_single'%70) lwidth(thin) lpattern(dash)) ///
	(line avg_single_L event_day if inrange(event_day,0,12), lcolor(`color_single'%70) lwidth(thin) lpattern(dash)) ///
	(scatteri `avg_multi_F' .1 `avg_multi_L' .1 , recast(line) lp(solid) lcolor(`color_multi') lwidth(0.6)) ///
	(scatteri `avg_single_F' -.1 `avg_single_L' -.1 , recast(line) lp(solid) lcolor(`color_single') lwidth(0.6)), ///
	legend(order(5 "multi-vehicle" 6 "single-vehicle")) xlabel(-14(2)12) ylabel(-20(20)80, nogrid) ///
	text(12 4 "change=28.6%", place(c) color(`color_multi') ) ///
	text(50 4 "change=74.9%", place(c) color(`color_single') ) ///
	xtitle("day") ytitle("% change (day-2=0)") graphregion(color(white)) xsize(4.5)
	
graph export "plots/division_100pct/new/multi_vs_single.pdf", replace	








*----------------------------------------------------------------------------------------
* figure 5 and 6, Table A6

*** FARS 

local insp_list "post_insp insp_p insp_m year month dayofweek event_time new_VIN date new_group_id inspection insp_month insp_day insp_year inspection_id" 

local fars_list "crash_event_18hr harm_ev fatals st_case trav_sp dr_drink dr_cf1 dr_cf2 dr_cf3 dr_cf4 speedrel crash_date vin12 ve_forms veh_cf1 veh_cf2 mfactor"

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

use `insp_list' `fars_list' `insp_F' using "$Truck/FARS/Insp_FARS_100pct.dta" if crash_event_18hr~=1 & inrange(event_time,-14,13), clear

* merge in flag_OOS
fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_outcome.dta", keepusing(oos_total) nogen keep(match master)

drop if oos_total > 0 & oos_total ~= .
drop oos_total

/* drop inspections during special event inspections + holidays */
if 1 {
gen insp_date = mdy(insp_month,insp_day,insp_year)
gen insp_dow = dow(date)

* 1. roadcheck 72 hours
gen roadcheck_day = 1 if insp_month == 6 & insp_day <= 8 & insp_day > 1 & insp_dow == 2

gen date_roadcheck_day = insp_date if roadcheck_day == 1
bysort insp_year: egen date_roadcheck = max(date_roadcheck_day)

replace roadcheck_day = 1 if inrange(insp_date,date_roadcheck + 1,date_roadcheck + 2)

* 2. brake week
gen brakeweek_day = 1 if insp_month == 9 & insp_day > 5 & insp_day <= 12 & insp_dow == 0

gen date_brakeweek_day = insp_date if brakeweek_day == 1
bysort insp_year: gegen date_brakeweek = max(date_brakeweek_day)
replace date_brakeweek = mdy(9,7,2017) if insp_year == 2017

replace brakeweek_day = 1 if inrange(insp_date, date_brakeweek, date_brakeweek+6)

* 3.  brake day
gen brakecheck_day = 1 if insp_month == 5 & insp_day <= 7 & insp_dow == 3
replace brakecheck_day = . if insp_year == 2018
replace brakecheck_day = 1 if insp_date == mdy(4,25,2018)

* 4. holidays 
gen holiday = 0
replace holiday = 1 if insp_month == 1 & insp_day == 1 // New Year
replace holiday = 1 if insp_month == 1 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // MLK
replace holiday = 1 if insp_month == 2 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // GW
replace holiday = 1 if insp_month == 5 & insp_dow == 1 & insp_day >= 25 & insp_day <= 31 // memorial
replace holiday = 1 if insp_month == 7 & insp_day==4 // indep
replace holiday = 1 if insp_month == 9 & insp_dow == 1 & insp_day >= 1 & insp_day <= 7 // memorial
replace holiday = 1 if insp_month == 10 & insp_dow == 1 & insp_day >= 8 & insp_day <= 14 // labor
replace holiday = 1 if insp_month == 11 & insp_day == 11 // veterans
replace holiday = 1 if insp_month == 11 & insp_dow == 4 & insp_day >= 22 & insp_day <= 28 // thanksgiving
replace holiday = 1 if insp_month == 11 & insp_dow == 5 & insp_day >= 23 & insp_day <= 29 // thanksgiving
replace holiday = 1 if insp_month == 12 & insp_day == 24 // xmas
replace holiday = 1 if insp_month == 12 & insp_day == 25 // xmas

* drop special event inspections + holiday
drop if roadcheck_day == 1 | brakeweek_day == 1 | brakecheck_day == 1 | holiday == 1

drop roadcheck_day brakeweek_day brakecheck_day holiday date_roadcheck_day date_roadcheck date_brakeweek_day date_brakeweek

}



gen fat_crash = 1 if fatals > 0 & fatals ~=.
replace fat_crash = 0 if fat_crash == .

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}



reghdfe fat_crash `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

reghdfe fat_crash post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ fat_crash if e(sample)


* factors contributing
gen dr_behavior = 0
forvalues i = 1(1)4 {	
	replace dr_cf`i' = 99 if dr_cf`i' == .
	replace dr_behavior = 1 if dr_cf`i' ~= 0 & dr_cf`i' ~= 99
}

gen dr_speeding = 0
forvalues i = 1(1)4 {
	replace dr_speeding = 1 if dr_cf`i' == 44 | dr_cf`i' == 43 | dr_cf`i' == 46
}
replace dr_speeding = 1 if inrange(speedrel, 1, 5) 


replace dr_drink = 0 if dr_drink == .  


gen veh_factors = 0
replace veh_factors = 1 if inrange(veh_cf1,1,19)
replace veh_factors = 1 if inrange(veh_cf2,1,19)
replace veh_factors = 1 if inrange(mfactor,1,97)


* speed
* merge in speed limit
merge m:1 st_case vin12 date using "$Truck/processed_data/FARS/truck_FARS.dta", keep(master match) keepusing(sp_limit) nogen

replace trav_sp = . if trav_sp > 97 & year <= 2008
replace trav_sp = . if trav_sp > 997 & year > 2008

gen speeding = 0
replace speeding = 1 if trav_sp > sp_limit & trav_sp ~= .

* combine all driver behaviors 
gen fat_crash_dr = 0
replace fat_crash_dr = 1 if dr_behavior == 1
replace fat_crash_dr = 1 if dr_speeding == 1
replace fat_crash_dr = 1 if dr_drink == 1
replace fat_crash_dr = 1 if veh_factors == 1
replace fat_crash_dr = 1 if speeding == 1


preserve 

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

keep fat_crash_dr insp_p insp_m date new_VIN `insp_F' year month dayofweek

reghdfe fat_crash_dr `insp_F' insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

reghdfe fat_crash_dr post_insp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ fat_crash_dr if e(sample)

restore
	

* plot all truck related factors

preserve 

collapse (sum) veh dr_drink dr_speeding, by(var98) 

replace var98 = (var98 - 8) * 2

rename var98 event_day2

// copy in tot_fars numbers by 2 days

gen pct_veh = veh / tot_fars 
gen pct_speed = dr_speeding / tot_fars 
gen pct_drink = dr_drink / tot_fars 


graph bar pct_speed pct_drink pct_veh if inrange(event_day2,-14,12), over(event_day2) ///
	bar(1, fcolor("253 204 138") lcolor("253 204 138") lpattern(solid) lwidth(thin)) ///
	bar(2,  fcolor("145 191 219") lcolor("145 191 219") lpattern(solid) lwidth(thin)) ///
	bar(3,  fcolor("215 0 28") lcolor("215 0 28") lpattern(solid) lwidth(thin)) ///
	legend(order(1 "Speeding" 2 "DWI" 3 "Vehicle maintenance"))  ///
	ytitle("Number of fatal crashes") b1title(days) graphregion(color(white))

gen all_dr = veh+dr_drink+dr_speeding 
gen pct_dr = all_dr / tot_fars 

summ pct_dr if inrange(event_day2,-14,-2)
summ pct_dr if inrange(event_day2,0,12)

	
restore


* plot total number of fatal crashes
use "$Truck/outputs/Log/EventPlots/figures.dta", clear

summ tot_fars if inrange(event_day,-14,-2)
gen avg_F = r(mean)

summ tot_fars if inrange(event_day,0,12)
gen avg_L = r(mean)

* 
summ avg_F
local avg_F = r(mean)
summ avg_L
local avg_L = r(mean)


tw (bar tot_fars event_day if inrange(event_day,-14,12), fintensity(inten50) fcolor(navy%60) lcolor(navy*0) lwidth(thin)) ///
	(line avg_F event_day if inrange(event_day,-14,0), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(line avg_L event_day if inrange(event_day,0,12), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(scatteri `avg_F' 0 `avg_L' 0 , recast(line) lp(solid) lcolor(gs4) lwidth(0.4)), ///
	text(20 -10 "average = 15", place(c) color(gs4) ) ///
	text(57 9 "average = 52", place(c) color(gs4) ) ///
	ytitle("Number of fatal crashes") ylabel(0(15)60)  ///
	xtitle("day") xlabel(-14(2)12) legend(off) ///
	graphregion(color(white))
graph export "$Truck/writing/paper/figures/fars_tot.pdf", replace	
	

	
	
*--------------------------------------------------------------------------------
* figure 7
use "$Truck/processed_data/TxDOT_CRIS/Insp_Crash_Event_tx.dta", clear 

fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_outcome.dta", keepusing(oos_total) nogen keep(match master)

drop if oos_total > 0 & oos_total ~= .
	
keep if crash_event_18hr ~= 1
keep if inrange(event_time,-14,13)


* look at contri factors: 
* driver factors: all contrib_factr except 0,1,2,74
gen dr_factor1 = 0

foreach var of varlist contrib_factr_1_id contrib_factr_2_id contrib_factr_3_id contrib_factr_p1_id contrib_factr_p2_id {

	replace dr_factor1 = 1 if inrange(`var', 3, 78)

}
gstats tab dr_factor1 if dr_factor1 == 1 & fed_record == 1 & inrange(event_time,-14,13) & crash_event_18hr ~= 1, by(event_2day) s(n)



* speeding (22,60,61), <200 cases
gen speeding = 0

foreach var of varlist contrib_factr_1_id contrib_factr_2_id contrib_factr_3_id contrib_factr_p1_id contrib_factr_p2_id {

	replace speeding = 1 if `var'== 22 | `var'== 60 | `var'== 61

}

gen event_2day = floor(event_time/2) *2 

gstats tab speeding if speeding == 1 inrange(event_time,-14,13) & crash_event_18hr ~= 1, by(event_2day) s(n)


gstats tab speeding if speeding == 1 & fed_record == 1 & inrange(event_time,-14,13) & crash_event_18hr ~= 1, by(event_2day) s(n)


* event study for driver-related accidents
gen crash_fed = crash_tx if fed_record == 1
replace crash_fed = 0 if crash_fed == .

replace crash_dr = crash_fed 
replace crash_dr = 0 if dr_factor1 == 0


local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

reghdfe crash_dr `insp_F' insp_p insp_m , absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

reghdfe crash_dr post_insp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

summ crash_dr if e(sample)


* plot
// dr_related
use figures, clear

rename var99 dr_related
rename var100 speeding

summ dr_related if inrange(event_day,-14,-2)
gen avg_p50_F = r(mean)

summ dr_related if inrange(event_day,0,12)
gen avg_p50_L = r(mean)

format avg_p50_F %9.1f
format avg_p50_L %9.1f

* 
summ avg_p50_F
local avg_p50_F = round(r(mean),1)
summ avg_p50_L
local avg_p50_L = round(r(mean),1)

twoway (bar dr_related event_day if inrange(event_day,-14,12), fcolor("153 142 195") lcolor("153 142 195") lpattern(solid) lwidth(thin)) ///
	(bar speeding event_day if inrange(event_day,-14,12), fcolor("241 163 64") lcolor("241 163 64") lpattern(solid) lwidth(thin)) ///
	(line avg_p50_F event_day if inrange(event_day,-14,0), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(line avg_p50_L event_day if inrange(event_day,0,12), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(scatteri `avg_p50_F' 0 `avg_p50_L' 0 , recast(line) lp(solid) lcolor(gs4) lwidth(0.6)), ///
	text(23 -8 "average = `avg_p50_F'", place(c) color(gs4) size(small)) ///
	text(36 6 "average = `avg_p50_L'", place(c) color(gs4) size(small)) ///
	legend(order(2 "speeding" 1 "driver related factors" )) ytitle("Number of crashes") xlabel(-14(2)12) xtitle("day")  ///
	graphregion(color(white))

graph export "$Truck/writing/paper/figures/txdot_dr.pdf", replace	


* event study
use "$Truck/r&r/new results/revision_plots.dta",clear 

rename (var57 var58 var59) (txdot lb_txdot ub_txdot)

local mean = 0.00000913
foreach var of varlist txdot lb_txdot ub_txdot {
	replace `var' = `var' / `mean' * 100
}

summ txdot if inrange(event_day,-14,-2)
gen txdot_F = r(mean)

summ txdot if inrange(event_day,0,12)
gen txdot_L = r(mean)

*
summ txdot_F
local txdot_F = r(mean)
summ txdot_L
local txdot_L = r(mean)

local color_treat "black"

local var txdot
local xaxis event_day
twoway (rarea lb_`var' ub_`var' event_day if event_day <= -4, color(`color_treat'%20) lcolor(bg)) ///
	(rarea lb_`var' ub_`var' event_day if inrange(event_day,0,12), color(`color_treat'%20) lcolor(bg)) ///
	(connected `var' event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color_treat') lcolor(`color_treat') lpattern(solid) lwidth(medium)) ///
	(line txdot_F event_day if inrange(event_day,-14,0), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(line txdot_L event_day if inrange(event_day,0,12), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(scatteri `txdot_L' 0 `txdot_F' 0 , recast(line) lp(solid) lcolor(gs10%50) lwidth(0.6)), ///
	legend(off) xlabel(-14(2)12, nogrid) ylabel(-40(40)160, nogrid) ///
	xtitle("day") ytitle("% change (day-2=0)") graphregion(color(white)) xsize(6)
	
graph export "$Truck/r&r/new results/txdot_dr_es.pdf", replace	









*-------------------------------------------------------------------------------
* Figure 8,9,10,11

*** look at inspection schedule (time-of-day or day-of-week) across states

use "InspectionData/Insp_by_year/Insp_100pct.dta", clear

drop location
gen inspection = 1

replace insp_year = floor(insp_date/10000)
gen insp_month = floor((insp_date - insp_year*10000)/100)
gen insp_day = insp_date - insp_year * 10000 - insp_month * 100

gen date = mdy(insp_month,insp_day,insp_year)

gen insp_start_hr = floor(insp_start_time/100)
gen insp_dow = dow(date)

drop if insp_facility == "N" | insp_facility == "" |  insp_facility == "NONE"

compress

* day of week

gcollapse (sum) inspection, by(insp_state insp_countyfips date insp_facility insp_year insp_dow insp_month insp_day)

reshape wide inspection, i(insp_state insp_countyfips date insp_year insp_dow insp_month insp_day) j(insp_facility) string

drop if insp_state == "US"
drop if strlen(insp_countyfips) < 5
 
compress

cap mkdir "InspectionData/Insp_by_year/temp"
save "InspectionData/Insp_by_year/temp/Insp_100pct_dow.dta", replace

* balanced panel
duplicates drop insp_state insp_countyfips insp_year, force
keep insp_countyfips insp_state insp_year

expand 12

bys insp_countyfips insp_state insp_year: gen insp_month = _n

expand 31

bys insp_countyfips insp_state insp_year insp_month: gen insp_day = _n

gen date = mdy(insp_month, insp_day, insp_year)
drop if date == .

gen insp_dow = dow(date)

merge 1:1 insp_state insp_countyfips date using "InspectionData/Insp_by_year/temp/Insp_100pct_dow.dta", assert(master match) nogen

replace inspectionF = 0 if inspectionF == .
replace inspectionR = 0 if inspectionR == .

gen insp_statefips = substr(insp_countyfips,1,2)
*destring insp_statefips, replace

destring insp_countyfips, replace

* day of week
reghdfe inspectionF, absorb(insp_countyfips#insp_year#insp_dow) residuals(res_dowF) nosample
reghdfe inspectionR, absorb(insp_countyfips#insp_year#insp_dow) residuals(res_dowR) nosample

* variance of residual by stateXyear
bys insp_state insp_year: gegen v_res_dowF = sd(res_dowF)
bys insp_state insp_year: gegen v_res_dowR = sd(res_dowR)



* look at fraction of total 
bysort insp_state insp_year: gegen sum_insp_F = sum(inspectionF)
bysort insp_state insp_year: gegen sum_insp_R = sum(inspectionR)


gcollapse (mean) sum* v_res*, by(insp_state insp_statefips insp_year)

replace v_res_dowF = . if v_res_dowF == 0
replace v_res_dowR = . if v_res_dowR == 0


compress
save "InspectionData/Insp_by_year/Insp_100pct_dow.dta", replace


* merge v_res index to event data

* 100pct event study:

local insp_list inspection_id post_insp insp_p insp_m year month dayofweek event_time new_VIN insp_countyfips date new_group_id insp_year
local crash_list crash crash_event_18hr event_id* vehicles_in_accident

use `insp_list' `crash_list' using "EventData/division_100pct/Insp_Crash_100pct_pre.dta",clear

* drop oos trucks
fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_outcome.dta", keepusing(oos_total) nogen keep(ma-tch master)

drop if oos_total > 0 & oos_total ~= .
drop oos_total


gen insp_statefips = substr(insp_countyfips,1,2) if strlen(insp_countyfips) == 5

* merge in stateXyear predictability index
merge m:1 insp_statefips insp_year using "InspectionData/Insp_by_year/Insp_100pct_dow.dta", keepusing(v_res_dowF) keep(master match) nogen 

* merge in stateXyear randomness index
merge m:1 insp_statefips insp_year using "InspectionData/Insp_by_year/Insp_100pct_Dtime.dta", keep(master match) nogen 

compress
save "EventData/division_100pct/Insp_Crash_100pct_schedule.dta", replace

* plot figure 8
clear all

set seed 1155665

set obs 2

gen state = _n

expand 4

bys state: gen week = _n

expand 7

bys state week: gen dow = _n -1


* state A 

gen inspection = 10 if state == 1 & dow == 0
replace inspection = 60 if state == 1 & dow == 1
replace inspection = 80 if state == 1 & dow == 2
replace inspection = 100 if state == 1 & dow == 3
replace inspection = 80 if state == 1 & dow == 4
replace inspection = 60 if state == 1 & dow == 5
replace inspection = 20 if state == 1 & dow == 6

* state B 
qui summ inspection

replace inspection = rnormal(`r(mean)',50) if state == 2
replace inspection = round(inspection) if state == 2
replace inspection = 0 if inspection < 0

* label day of week
label define dow 0 "Sun" 1 "Mon" 2 "Tue" 3 "Wed" 4 "Thur" 5 "Fri" 6 "Sat", replace
label values dow "dow"

bys state: gen date = _n

tw bar inspection dow if state == 1 & inrange(week,1,2), by(week, graphregion(color(white))) fcolor(red%20) lcolor(red) xlabel(0(1)6) ylabel(0(20)100, nogrid) xtitle("") xsize(6) xlabel(,valuelabel)

graph export "$Truck/writing/slides/Oct2019/dow_schedule1.pdf", replace


tw bar inspection dow if state == 2 & inrange(week,1,2), by(week, graphregion(color(white))) fcolor(blue%20) lcolor(blue) xlabel(0(1)6) ylabel(0(25)150, nogrid) xtitle("") xsize(6) xlabel(,valuelabel)

graph export "$Truck/writing/slides/Oct2019/dow_schedule2.pdf", replace

* calculate v_res for state 2
reghdfe inspection, absorb(state#dow) residuals(res_dowF) nosample

bys state: gegen v_res_dowF = sd(res_dowF)



* map in figure 9
use "$Truck/processed_data/InspectionData/Insp_by_year/Insp_100pct_schedule.dta", clear

rename insp_state state

maptile v_res_dowF if insp_year == 2017, geo(state) n(10)  rangecolor(222,235,247 49,130,189) legdecimals(1)


graph export "$Truck/outputs/Log/plots/division_100pct/map_v_res.pdf", replace	


* plot figure 10
clear all

set seed 1155665

set obs 3650
gen x = _n/10

gen upper_y1 = 70
gen lower_y1 = 60

gen random = uniform()

gen red1 = (x >= 365*0.75)
replace red1 = (random <= 0.2) if red1 == 1

gen red2 = (random <= 0.05)

gen upper_y2 = 30
gen lower_y2 = 20


twoway rcap upper_y1 lower_y1 x, msize(0) lcolor(gs13) || ///
	rcap upper_y1 lower_y1 x if red1 == 1, msize(0) lw(0.05) lcolor(red) || ///
	rcap upper_y2 lower_y2 x, msize(0) lcolor(gs13) || ///
	rcap upper_y2 lower_y2 x if red2 == 1, msize(0) lw(0.05) lcolor(red) || ///
	scatteri 75 319 60 319, recast(line) lp(shortdash) lcolor(red) lwidth(0.3) || ///
	scatteri 35 180 20 180, recast(line) lp(shortdash) lcolor(red) lwidth(0.3) ///
	ylabel(10(20)80, nogrid nolabels noticks) yscale(lstyle(none)) ///
	legend(off) xsize(5) xlabel(1 30(30)330 365) xtitle("days") ///
	graphregion(color(white)) ///
	text(55 180 "State A: long average reinspection interval", size(4) place(c) color(black) ) text(15 180 "State B: short average reinspection interval", size(4) place(c) color(black) ) ///
	text(77 319 "average", size(3.5) place(c) color(red) ) text(37 180 "average", size(3.5) place(c) color(red) ) 
graph export "$Truck/writing/slides/Dec2020/new figures/reinsp_time.pdf", replace	
	


* map in figure 11
use "$Truck/processed_data/InspectionData/Insp_by_year/Insp_100pct_Dtime.dta",clear

drop if insp_state == ""

rename insp_state state

maptile D_insp_date_p50 if insp_year == 2017, geo(state) cutvalues(90 150 210 270 365) fcolor(Greens) legdecimals(1) twopt(legend(order(2 "< 3month" 3 "3m - 5m" 4 "5m - 7m" 5 "7m - 9m" 6 "9m - 12m" 7 "> 1 year" )))

graph export "$Truck/outputs/Log/plots/division_100pct/map_Dtime.pdf", replace	





*-------------------------------------------------------------------------------------------------
* table 1

*** panel a: inspections
use inspection new_VIN flag_oos using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if flag_oos ~= 1, clear

count if inspection == 1  // weigh station inspections

preserve 
duplicates drop new_VIN, force // trucks ever inspected
restore 

use "$Truck/processed_data/InspectionData/Insp_by_year/temp/Insp_100pct_VIN.dta", clear

count if inspection == 1 // total inspections (including roadside random insp)


* panel a: truck volume
use count_added truck_count year flag_oos event_time crash_event_18hr using "$Truck/Insp_Crash_100pct_traff.dta" if year >= 2012 & flag_oos ~=1 & event_time == 0 & crash_event_18hr ~= 1,clear

* drop outlier
qui summ count_added, detail
drop if count_added > r(p99)

gstats tab truck_count if truck_count ~= 0 , s(n mean sd min p50 max)


* panel a: violations
use viol_total oos_total driver_viol_total vehicle_viol_total event_time crash_event* using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta", clear


gstats tab oos_total if oos_total > 0 & oos_total ~= . & event_time == 0 & crash_event_18hr ~= 1, s(n mean sd min p50 max)

gstats tab driver_viol_total if oos_total == 0 & driver_viol_total> 0 & driver_viol_total ~= . & event_time == 0 & crash_event_18hr ~= 1, s(n mean sd min p50 max)

gstats tab vehicle_viol_total if oos_total == 0 & vehicle_viol_total> 0 & vehicle_viol_total ~= . & event_time == 0 & crash_event_18hr ~= 1, s(n mean sd min p50 max)

use oos_total date event_time insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number insp_countyfips  using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if oos_total == 56 & event_time == 0, clear




*** panel B: daily
local insp_list "post_insp insp_p insp_m year month dayofweek event_time new_VIN date new_group_id insp_time inspection"

local crash_list "crash injuries fatalities event_id* vehicles_in_accident crash_time crash_event* flag_oos"

use `insp_list' `crash_list' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if flag_oos ~= 1, clear


gstats tab crash injuries fatalities if inrange(event_time,-14,13) & crash_event_18hr ~= 1, s(n mean sd min p50 max)




*** panel c: monthly 
use crash injuries fatalities event_month insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number insp_year insp_month event_yearmonth using "$Truck/processed_data/EventData/division_100pct/temp/Insp_Crash_month2_100pct.dta", clear

* correct vin names
if 1 {
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "SAME" | insp_unit_vehicle_id_number == "UNREADABLE" | insp_unit_vehicle_id_number == "NO VIN" | insp_unit_vehicle_id_number == "RR" | insp_unit_vehicle_id_number == "DRIVER/CARRIER"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UKNOWN" 
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UKN" 
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "0"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "."
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "//"	
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "CARRIER" | insp_unit_vehicle_id_number == "DRIVER"

replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"NONE")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"0000000")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"TEMP")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"UNK")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"**")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"--")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"- - -")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"...")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"///")>0
replace insp_unit_vehicle_id_number = "" if strlen(insp_unit_vehicle_id_number)<5
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"#")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"NOT")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"white")>0


replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "CARRIER"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "DRIVER"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNK"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NONE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNKNOWN"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NA"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "N/A"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00000000000000000"
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"RYDER")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"9999999")>0
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "PENSKE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "FLEET"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "INTERCHANGEABLE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "unknown"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "0"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "0000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "1"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "//"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "``"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "XTRALEASE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "XTRA LEASE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNREADABLE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UK"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "SHIPPER"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "SAME"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NOTFOUND"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "MAPPER"

replace insp_unit_license_state = "" if insp_unit_license_state == "UK"
replace insp_unit_license = "" if insp_unit_license == "UNKNOWN"
replace insp_unit_license = "" if insp_unit_license == "UK"
replace insp_unit_license = "" if insp_unit_license == "UNK"
replace insp_unit_license = "" if insp_unit_license == "9"
replace insp_unit_license = "" if insp_unit_license == "UNK"
replace insp_unit_license = "" if insp_unit_license == "NONE"
replace insp_unit_license = "" if insp_unit_license == "TEMP"
replace insp_unit_license = "" if insp_unit_license == "NA"
replace insp_unit_license = "" if insp_unit_license == "N/A"
replace insp_unit_license = "" if insp_unit_license == "NEW"
replace insp_unit_license = "" if strpos(insp_unit_license,"NO")>0
replace insp_unit_license = "" if strpos(insp_unit_license,"T.O.P")>0
replace insp_unit_license = "" if strpos(insp_unit_license,"TOP")>0
replace insp_unit_license = "" if strpos(insp_unit_license,"ONLY")>0
replace insp_unit_license = "" if strpos(insp_unit_license,"TEMP")>0
replace insp_unit_license = "" if insp_unit_license == "PENDING"
replace insp_unit_license = "" if insp_unit_license == "VIN"
replace insp_unit_license = "" if insp_unit_license == "APPLIED"
replace insp_unit_license = "" if insp_unit_license == "INTRANSIT"
replace insp_unit_license = "" if insp_unit_license == "L9999"
replace insp_unit_license = "" if insp_unit_license == "PERMIT"
}

* gen leads and lags
drop if insp_unit_license == "" & insp_unit_vehicle_id_number == ""

gegen new_license = group(insp_unit_license_state insp_unit_license), missing
replace new_license = 0 if new_license == .
replace new_license = - new_license 

gegen new_VIN = group(insp_unit_vehicle_id_number)
replace new_VIN = new_license if insp_unit_vehicle_id_number == ""

bys new_VIN insp_year insp_month event_month: gen N = _n
	** the case is that VIN number is mis-recorded
drop if N > 1  // drop 0.2%

* generate event id
gegen new_group_id = group(new_VIN insp_year insp_month)
replace new_group_id = 0 if new_group_id == .
replace new_group_id = . if event_yearmonth == .


* outside event window insp indicator
bys new_VIN: gegen last_insp = max(ym(insp_year,insp_month))
bys new_VIN: gegen first_insp = min(ym(insp_year,insp_month))

* drop (-12,24) of (first, last) insp
gen start = first_insp + ym(1960,12)
gen end = last_insp - 2*ym(1960,12)

gen insp_yearmonth = ym(insp_year, insp_month)

gen within = 1 if inrange(insp_yearmonth, start,end)

preserve 
keep if within == 1

* drop outliers of crash
egen crash_max = max(crash), by(new_group_id)
summ crash_max if crash_max ~=0, detail
drop if crash_max > r(p99)

gstats tab crash injuries fatalities if inrange(event_month,-12,24), s(n mean sd min p50 max)


*** panel d: carriers 
use "CensusData/carrier_census", clear 

gstats tab tot_pwr tot_drs , s(n mean sd min p50 max)









*-----------------------------------------------------------------------------------
* Table 3 and table A19

* predictability and reinspection index *

use crash post_insp insp_p insp_m year month dayofweek event_time crash_event_18hr new_VIN D_insp_date_p50 sum_insp_F v_res_dowF insp_statefips using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_schedule.dta", clear

* oos trucks already dropped

destring insp_statefips, gen(state)


local varlist "dowF"
foreach var of local varlist {

	qui summ v_res_`var' 
	gen std_v_`var'  = (v_res_`var'  - `r(mean)')/`r(sd)'
	
	gen I_post_`var' = post_insp * std_v_`var'
}


qui summ D_insp_date_p50
gen std_Dtime = (D_insp_date_p50 - `r(mean)')/`r(sd)'

gen I_post_std_Dtime = post_insp * std_Dtime


reghdfe crash post_insp std_v_dowF I_post_dowF insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash std_v_dowF if e(sample)

reghdfe crash post_insp std_Dtime I_post_std_Dtime insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)


* Table A19
* control for total number of inspections (frequency)
* vmt
preserve 

use "$Truck/processed_data/FHWA/vmt.dta", clear

* merge in state fips
merge m:1 state_name using "$Truck/processed_data/Geo_files/state_fips.dta", nogen keep(master match)

drop if insp_statefips == .

rename insp_statefips state

drop insp_state state_name

tempfile vmt
save `vmt'

restore

* vmt in millions
fmerge m:1 state year using `vmt', keep(master match) nogen

* gen insp_freq 
gen insp_freq = sum_insp_F / tot_vmt 

* control for sum_insp_F
qui summ insp_freq
gen std_suminsp = (insp_freq - `r(mean)')/`r(sd)'

gen I_post_suminsp = post_insp * std_suminsp


reghdfe crash post_insp std_v_dowF I_post_dowF std_suminsp I_post_suminsp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash std_v_dowF if e(sample)

reghdfe crash post_insp std_Dtime I_post_std_Dtime std_suminsp I_post_suminsp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)











